{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "\n",
    "import imagededup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "image_dir = Path('../tests/data/mixed_images')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Get duplicate mapping using the cnn method"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /Users/tanuj.jain/anaconda3/envs/dedup_package_env/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Colocations handled automatically by placer.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2019-10-01 19:03:05,595: INFO Initialized: MobileNet pretrained on ImageNet dataset sliced at last conv layer and added GlobalAveragePooling\n",
      "2019-10-01 19:03:05,596: INFO Start: Image encoding generation\n",
      "2019-10-01 19:03:05,718: WARNING Invalid image file /Users/tanuj.jain/Documents/image-dedup/examples/../tests/data/mixed_images/ukbench09268_corrupt.jpg:\n",
      "cannot identify image file PosixPath('/Users/tanuj.jain/Documents/image-dedup/examples/../tests/data/mixed_images/ukbench09268_corrupt.jpg')\n",
      "2019-10-01 19:03:05,902: WARNING Invalid image file /Users/tanuj.jain/Documents/image-dedup/examples/../tests/data/mixed_images/ukbench09268_corrupt.jpg:\n",
      "cannot identify image file PosixPath('/Users/tanuj.jain/Documents/image-dedup/examples/../tests/data/mixed_images/ukbench09268_corrupt.jpg')\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\r",
      "1/1 [==============================] - 1s 605ms/step\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2019-10-01 19:03:06,241: INFO End: Image encoding generation\n",
      "2019-10-01 19:03:06,241: INFO Start: Calculating cosine similarities...\n",
      "2019-10-01 19:03:06,243: INFO End: Calculating cosine similarities.\n"
     ]
    }
   ],
   "source": [
    "from imagededup.methods import CNN\n",
    "\n",
    "cnn_encoder = CNN()\n",
    "duplicates_cnn = cnn_encoder.find_duplicates(image_dir=image_dir, min_similarity_threshold=0.97)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'ukbench00120.jpg': ['ukbench00120_resize.jpg'],\n",
       " 'ukbench00120_hflip.jpg': [],\n",
       " 'ukbench00120_resize.jpg': ['ukbench00120.jpg'],\n",
       " 'ukbench00120_rotation.jpg': [],\n",
       " 'ukbench09268.jpg': []}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "duplicates_cnn"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Ground truth provided by the user"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "ground_truth = {'ukbench00120.jpg': ['ukbench00120_hflip.jpg','ukbench00120_resize.jpg'],\n",
    " 'ukbench00120_hflip.jpg': ['ukbench00120.jpg','ukbench00120_resize.jpg'],\n",
    " 'ukbench00120_resize.jpg': ['ukbench00120.jpg','ukbench00120_hflip.jpg'],\n",
    " 'ukbench00120_rotation.jpg': [],\n",
    " 'ukbench09268.jpg': []}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'ukbench00120.jpg': ['ukbench00120_hflip.jpg', 'ukbench00120_resize.jpg'],\n",
       " 'ukbench00120_hflip.jpg': ['ukbench00120.jpg', 'ukbench00120_resize.jpg'],\n",
       " 'ukbench00120_resize.jpg': ['ukbench00120.jpg', 'ukbench00120_hflip.jpg'],\n",
       " 'ukbench00120_rotation.jpg': [],\n",
       " 'ukbench09268.jpg': []}"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ground_truth"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Run evaluation to get metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2019-10-01 19:03:06,293: INFO Validating ground truth map ..\n",
      "2019-10-01 19:03:06,294: INFO Ground truth map validated\n",
      "2019-10-01 19:03:06,295: INFO Validating retrieved map ..\n",
      "2019-10-01 19:03:06,296: INFO Duplicate map validated\n",
      "2019-10-01 19:03:06,296: INFO Validating ground truth map and retrieved map consistency..\n",
      "2019-10-01 19:03:06,297: INFO Ground truth map and retrieved map found to be consistent.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.78      1.00      0.88         7\n",
      "           1       1.00      0.33      0.50         3\n",
      "\n",
      "    accuracy                           0.80        10\n",
      "   macro avg       0.89      0.67      0.69        10\n",
      "weighted avg       0.84      0.80      0.76        10\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from imagededup.evaluation import evaluate\n",
    "metrics = evaluate(ground_truth_map=ground_truth, retrieved_map=duplicates_cnn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'map': 0.6,\n",
       " 'ndcg': 0.8,\n",
       " 'jaccard': 0.6,\n",
       " 'precision': array([0.77777778, 1.        ]),\n",
       " 'recall': array([1.        , 0.33333333]),\n",
       " 'f1_score': array([0.875, 0.5  ]),\n",
       " 'support': array([7, 3])}"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "metrics"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
